The goals / steps of this project are the following:
import zipfile as zf
extract_training_images = False
if extract_training_images is True:
# Assuming zip files are present in `training_images` folder
for training_set in ['vehicles', 'non-vehicles']:
with zf.ZipFile('training_images/' + training_set + '.zip', 'r') as files:
files.extractall('training_images')
import cv2
import glob
import os
def read_images(directory, pattern):
"""
Returns an image list with the image contained on the directory `dir` matching the `pattern`.
"""
images = []
for dirpath, dirnames, filenames in os.walk(directory):
for dirname in dirnames:
images.append(glob.glob(directory + '/' + dirname + '/' + pattern))
flatten = [item for sublist in images for item in sublist]
return list(map(lambda img: cv2.cvtColor(cv2.imread(img), cv2.COLOR_BGR2RGB), flatten))
vehicles = read_images('./training_images/vehicles', '*.png')
non_vehicles = read_images('./training_images/non-vehicles', '*.png')
num_vehicles = len(vehicles)
num_non_vehicles = len(non_vehicles)
print("Number of vehicle images in training set: {}".format(num_vehicles))
print("Number of non-vehicle images in training set: {}".format(num_non_vehicles))
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
fig, axs = plt.subplots(4, 4, figsize=(16, 16))
axs = axs.ravel()
def random_image(images, len_images):
return images[np.random.randint(0, len_images)]
for i in range(16):
if (i % 2 == 0):
img = random_image(vehicles, num_vehicles)
axs[i].set_title('vehicle', fontsize=10)
else:
img = random_image(non_vehicles, num_non_vehicles)
axs[i].set_title('non-vehicle', fontsize=10)
axs[i].axis('off')
axs[i].imshow(img)
from skimage.feature import hog
def bin_spatial(img, size=(32, 32)):
return cv2.resize(img, size).ravel()
def color_hist(img, nbins=32, bins_range=(0, 256)):
# Compute the histogram of color channels separately
channel1_hist = np.histogram(img[:,:,0], bins=nbins, range=bins_range)
channel2_hist = np.histogram(img[:,:,1], bins=nbins, range=bins_range)
channel3_hist = np.histogram(img[:,:,2], bins=nbins, range=bins_range)
# Concatenate the histograms into a single feature vector
hist_features = np.concatenate((channel1_hist[0], channel2_hist[0], channel3_hist[0]))
# Return the individual histograms, bin_centers and feature vector
return hist_features
def get_hog_features(img, orient, pix_per_cell, cell_per_block,
vis=False, feature_vec=True):
return hog(img, orientations=orient,
pixels_per_cell=(pix_per_cell, pix_per_cell),
cells_per_block=(cell_per_block, cell_per_block),
transform_sqrt=False,
visualise=vis, feature_vector=feature_vec)
# Value object to hold all feature params
class FeaturesParameters(object):
def __init__(self,
cspace='YCrCb',
orient=8,
pix_per_cell=8,
cell_per_block=2,
hog_channel='ALL',
size=(16, 16),
hist_bins=32,
hist_range=(0, 256)):
self.cspace = cspace
self.orient = orient
self.pix_per_cell = pix_per_cell
self.cell_per_block = cell_per_block
self.hog_channel = hog_channel
self.size = size
self.hist_bins = hist_bins
self.hist_range = hist_range
def transform_cspace(image, cspace):
cspaces = {
'HSV': cv2.COLOR_RGB2HSV,
'LUV': cv2.COLOR_RGB2LUV,
'HLS': cv2.COLOR_RGB2HLS,
'YUV': cv2.COLOR_RGB2YUV,
'YCrCb': cv2.COLOR_RGB2YCrCb
}
if cspace == 'RGB':
return image
return cv2.cvtColor(image, cspaces[cspace])
def extract_features(image, params):
cspace = params.cspace
orient = params.orient
pix_per_cell = params.pix_per_cell
cell_per_block = params.cell_per_block
hog_channel = params.hog_channel
# Spatial parameters
size = params.size
# Histogram parameters
hist_bins = params.hist_bins
hist_range = params.hist_range
# apply color conversion if other than 'RGB'
feature_image = transform_cspace(image, cspace)
# Call get_hog_features() with vis=False, feature_vec=True
if hog_channel == 'ALL':
hog_features = []
for channel in range(feature_image.shape[2]):
hog_features.append(get_hog_features(feature_image[:,:,channel],
orient, pix_per_cell, cell_per_block,
vis=False, feature_vec=True))
hog_features = np.ravel(hog_features)
else:
hog_features = get_hog_features(feature_image[:,:,hog_channel], orient,
pix_per_cell, cell_per_block, vis=False, feature_vec=True)
# Apply bin_spatial() to get spatial color features
spatial_features = bin_spatial(feature_image, size)
# Apply color_hist()
hist_features = color_hist(feature_image, nbins=hist_bins, bins_range=hist_range)
return np.concatenate((spatial_features, hist_features, hog_features))
# HOG params
params = FeaturesParameters() # default params
# params = FeaturesParameters(cspace='YUV',
# orient=11,
# pix_per_cell=16,
# cell_per_block=2)
def showHOG(img, title):
"""
Calculate HOG on the image `img` and the YCrCb color space and show them on a graph.
"""
img_cspaced = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
_, hog_y = get_hog_features(img_cspaced[:,:,0],
params.orient, params.pix_per_cell, params.cell_per_block,
vis=True, feature_vec=True)
_, hog_Cr = get_hog_features(img_cspaced[:,:,1],
params.orient, params.pix_per_cell, params.cell_per_block,
vis=True, feature_vec=True)
_, hog_Cb = get_hog_features(img_cspaced[:,:,2],
params.orient, params.pix_per_cell, params.cell_per_block,
vis=True, feature_vec=True)
fig, axes = plt.subplots(ncols=4, figsize=(15,15))
axes[0].imshow(img)
axes[0].set_title(title)
axes[1].imshow(hog_y, cmap='gray')
axes[1].set_title('HOG - Y')
axes[2].imshow(hog_Cr, cmap='gray')
axes[2].set_title('HOG - Cr')
axes[3].imshow(hog_Cb, cmap='gray')
axes[3].set_title('HOG - Cb')
vehicle = random_image(vehicles, num_vehicles)
non_vehicle = random_image(non_vehicles, num_non_vehicles)
showHOG(vehicle, 'Vehicle')
showHOG(non_vehicle, 'Non-vehicle')
from sklearn.model_selection import train_test_split
import time
def fitModel( positive, negative, svc, scaler, params ):
"""
Trains the classifier `svc`. The feature extraction is done using the parameters stored in `params`.
The feature scalling is done by the scaler `scaler`.
Returns: (`svc`, fittingTime, accuracy)
"""
positive_features = list(map(lambda img: extract_features(img, params), positive))
negatice_features = list(map(lambda img: extract_features(img, params), negative))
# Stacking and scaling
X = np.vstack((positive_features, negatice_features)).astype(np.float64)
X_scaler = scaler.fit(X)
scaled_X = X_scaler.transform(X)
# Defining objective
y = np.hstack((np.ones(len(positive_features)), np.zeros(len(negatice_features))))
# Split up data into randomized training and test sets
rand_state = np.random.randint(0, 100)
X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.2, random_state=rand_state)
# Fitting
t=time.time()
svc.fit(X_train, y_train)
t2 = time.time()
fittingTime = round(t2 - t, 2)
accuracy = round(svc.score(X_test, y_test),4)
return (svc, X_scaler, fittingTime, accuracy)
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
svc, scaler, fittingTime, accuracy = fitModel(vehicles,
non_vehicles,
LinearSVC(),
StandardScaler(),
params)
print('Fitting time: {} s, Accuracy {}'.format(fittingTime, accuracy))
# Basic functions provided on Udacity's course to creating windows and drawing boxes on an image.
# Here is your draw_boxes function from the previous exercise
def draw_boxes(img, bboxes, color=(0, 0, 255), thick=6):
# Make a copy of the image
imcopy = np.copy(img)
# Iterate through the bounding boxes
for bbox in bboxes:
# Draw a rectangle given bbox coordinates
cv2.rectangle(imcopy, bbox[0], bbox[1], color, thick)
# Return the image copy with boxes drawn
return imcopy
# Define a function that takes an image,
# start and stop positions in both x and y,
# window size (x and y dimensions),
# and overlap fraction (for both x and y)
def slide_window(img, x_start_stop=[None, None], y_start_stop=[None, None],
xy_window=(64, 64), xy_overlap=(0.5, 0.5)):
# If x and/or y start/stop positions not defined, set to image size
if x_start_stop[0] == None:
x_start_stop[0] = 0
if x_start_stop[1] == None:
x_start_stop[1] = img.shape[1]
if y_start_stop[0] == None:
y_start_stop[0] = 0
if y_start_stop[1] == None:
y_start_stop[1] = img.shape[0]
# Compute the span of the region to be searched
xspan = x_start_stop[1] - x_start_stop[0]
yspan = y_start_stop[1] - y_start_stop[0]
# Compute the number of pixels per step in x/y
nx_pix_per_step = np.int(xy_window[0]*(1 - xy_overlap[0]))
ny_pix_per_step = np.int(xy_window[1]*(1 - xy_overlap[1]))
# Compute the number of windows in x/y
nx_buffer = np.int(xy_window[0]*(xy_overlap[0]))
ny_buffer = np.int(xy_window[1]*(xy_overlap[1]))
nx_windows = np.int((xspan-nx_buffer)/nx_pix_per_step)
ny_windows = np.int((yspan-ny_buffer)/ny_pix_per_step)
# Initialize a list to append window positions to
window_list = []
# Loop through finding x and y window positions
# Note: you could vectorize this step, but in practice
# you'll be considering windows one by one with your
# classifier, so looping makes sense
for ys in range(ny_windows):
for xs in range(nx_windows):
# Calculate window position
startx = xs*nx_pix_per_step + x_start_stop[0]
endx = startx + xy_window[0]
starty = ys*ny_pix_per_step + y_start_stop[0]
endy = starty + xy_window[1]
# Append window position to list
window_list.append(((startx, starty), (endx, endy)))
# Return the list of windows
return window_list
test_images = list(map(lambda img: cv2.cvtColor(cv2.imread(img), cv2.COLOR_BGR2RGB), glob.glob('./test_images/*.jpg')))
def findCarWindows(img, clf, scaler, params, y_start_stop=[360, 700], xy_window=(64, 64), xy_overlap=(0.85, 0.85) ):
"""
Returns the windows where the cars are found on the image `img`.
The feature extraction used parameters `params`.
`y_start_stop` : Contains the Y axis range to find the cars.
`xy_window` : Contains the windows size.
`xy_overlap` : Contains the windows overlap percent.
Returns a new image with the cars boxes.
"""
car_windows = []
windows = slide_window(img, y_start_stop=y_start_stop, xy_window=xy_window, xy_overlap=xy_overlap)
for window in windows:
img_window = cv2.resize(img[window[0][1]:window[1][1], window[0][0]:window[1][0]], (64, 64))
features = extract_features(img_window, params)
scaled_features = scaler.transform(features.reshape(1, -1))
pred = clf.predict(scaled_features)
if pred == 1:
car_windows.append(window)
return car_windows
def drawCars(img, windows):
"""
Draw the `windows` on the image `img`.
"""
output = np.copy(img)
return draw_boxes(output, windows)
car_on_test = list(map(lambda img: drawCars(img, findCarWindows(img, svc, scaler, params)), test_images))
def showImages(images, cols = 2, rows = 3, figsize=(15,13)):
"""
Display `images` on a [`cols`, `rows`] subplot grid.
"""
imgLength = len(images)
fig, axes = plt.subplots(rows, cols, figsize=figsize)
indexes = range(cols * rows)
for ax, index in zip(axes.flat, indexes):
if index < imgLength:
image = images[index]
ax.imshow(image)
showImages(car_on_test)
# Heat map and threshold functions from Udacity's course
def add_heat(heatmap, bbox_list):
# Iterate through list of bboxes
for box in bbox_list:
# Add += 1 for all pixels inside each bbox
# Assuming each "box" takes the form ((x1, y1), (x2, y2))
heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1
# Return updated heatmap
return heatmap
def apply_threshold(heatmap, threshold):
# Zero out pixels below the threshold
heatmap[heatmap <= threshold] = 0
# Return thresholded map
return heatmap
def draw_labeled_bboxes(img, labels):
# Iterate through all detected cars
for car_number in range(1, labels[1]+1):
# Find pixels with each car_number label value
nonzero = (labels[0] == car_number).nonzero()
# Identify x and y values of those pixels
nonzeroy = np.array(nonzero[0])
nonzerox = np.array(nonzero[1])
# Define a bounding box based on min/max x and y
bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
# Draw the box on the image
cv2.rectangle(img, bbox[0], bbox[1], (0,0,255), 6)
# Return the image
return img
from scipy.ndimage.measurements import label
def drawCarsWithLabels(img, boxes, threshHold = 4):
"""
Draw the car boxes `boxes` on the image `img` using a heatmap with threshold `threshHold`.
"""
heatmap = add_heat(np.zeros(img.shape), boxes)
heatmap = apply_threshold(heatmap, threshHold)
labels = label(heatmap)
return draw_labeled_bboxes(np.copy(img), labels)
boxed_on_test = list(map(lambda img: drawCarsWithLabels(img, findCarWindows(img, svc, scaler, params)), test_images))
showImages(boxed_on_test)
def findBoxes(img, clf, scaler, params, y_start_stop=[350, 656], window=64, cells_per_step=1, scale=1.5 ):
"""
Returns the windows where the cars are found on the image `img`.
The feature extraction used parameters `params`.
`y_start_stop` : Contains the Y axis range to find the cars.
`window` : Number of windows.
`cells_per_step` : Number of cells per step.
Returns a new image with the cars boxes.
"""
# Parameters extraction
# HOG parameters
cspace = params.cspace
orient = params.orient
pix_per_cell = params.pix_per_cell
cell_per_block = params.cell_per_block
hog_channel = params.hog_channel
# Spatial parameters
size = params.size
# Histogram parameters
hist_bins = params.hist_bins
hist_range = params.hist_range
# Image color space changes
feature_image = transform_cspace(img, cspace)
ystart, ystop = y_start_stop
ctrans_tosearch = feature_image[ystart:ystop,:,:]
if scale != 1:
imshape = ctrans_tosearch.shape
ctrans_tosearch = cv2.resize(ctrans_tosearch, (np.int(imshape[1]/scale), np.int(imshape[0]/scale)))
ch1 = ctrans_tosearch[:,:,0]
ch2 = ctrans_tosearch[:,:,1]
ch3 = ctrans_tosearch[:,:,2]
# Define blocks and steps as above
nxblocks = (ch1.shape[1] // pix_per_cell) - cell_per_block + 1
nyblocks = (ch1.shape[0] // pix_per_cell) - cell_per_block + 1
nfeat_per_block = orient*cell_per_block**2
nblocks_per_window = (window // pix_per_cell) - cell_per_block + 1
nxsteps = (nxblocks - nblocks_per_window) // cells_per_step
nysteps = (nyblocks - nblocks_per_window) // cells_per_step
# Compute individual channel HOG features for the entire image
hog1 = get_hog_features(ch1, orient, pix_per_cell, cell_per_block, feature_vec=False)
hog2 = get_hog_features(ch2, orient, pix_per_cell, cell_per_block, feature_vec=False)
hog3 = get_hog_features(ch3, orient, pix_per_cell, cell_per_block, feature_vec=False)
car_windows = []
for xb in range(nxsteps):
for yb in range(nysteps):
ypos = yb*cells_per_step
xpos = xb*cells_per_step
# Extract HOG for this patch
hog_feat1 = hog1[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel()
hog_feat2 = hog2[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel()
hog_feat3 = hog3[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel()
hog_features = np.hstack((hog_feat1, hog_feat2, hog_feat3))
xleft = xpos*pix_per_cell
ytop = ypos*pix_per_cell
# Extract the image patch
subimg = cv2.resize(ctrans_tosearch[ytop:ytop+window, xleft:xleft+window], (64,64))
# Get color features
spatial_features = bin_spatial(subimg, size=size)
hist_features = color_hist(subimg, nbins=hist_bins, bins_range=hist_range)
# Scale features and make a prediction
test_features = scaler.transform(np.hstack((spatial_features, hist_features, hog_features)).reshape(1, -1))
test_prediction = clf.predict(test_features)
if test_prediction == 1:
xbox_left = np.int(xleft*scale)
ytop_draw = np.int(ytop*scale)
win_draw = np.int(window*scale)
car_windows.append(((xbox_left, ytop_draw+ystart),(xbox_left+win_draw,ytop_draw+win_draw+ystart)))
return car_windows
fast_boxes = list(map(lambda img: findBoxes(img, svc, scaler, params), test_images))
fast_on_test = list(map(lambda imgAndBox: drawCars(imgAndBox[0], imgAndBox[1]), zip(test_images, fast_boxes)))
showImages(fast_on_test)
fast_on_test = list(map(lambda imgAndBox: drawCarsWithLabels(imgAndBox[0], imgAndBox[1], threshHold=1), zip(test_images, fast_boxes)))
showImages(fast_on_test)
from moviepy.editor import VideoFileClip
from functools import reduce
import collections
class HeatHistory():
def __init__(self):
self.history = []
def processVideo(inputVideo, outputVideo, frames_to_remember=3, threshhold=1):
"""
Process the video `inputVideo` to find the cars and saves the video to `outputVideo`.
"""
heatmaps = collections.deque(maxlen=frames_to_remember)
def pipeline(img):
boxes = findBoxes(img, svc, scaler, params)
img_shape = img.shape
heatmap = add_heat(np.zeros(img_shape), boxes)
heatmaps.append(heatmap)
heatmap_sum = sum(heatmaps)
heat_map = apply_threshold(heatmap_sum, threshhold)
labels = label(heat_map)
return draw_labeled_bboxes(np.copy(img), labels)
myclip = VideoFileClip(inputVideo)
output_video = myclip.fl_image(pipeline)
output_video.write_videofile(outputVideo, audio=False)
processVideo('./project_video.mp4', './project_video_output.mp4', frames_to_remember=10, threshhold=2)